package rs;

import java.io.IOException;
import java.net.MalformedURLException;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import rs.crawlerUtil.HttpProxy;
import rs.crawlerUtil.ProxyMgr;
import rs.util.Log;

public class Crawler {
	
	public String url;
	public String proxy;
	
	public Crawler(){
		
	}
	/**
	 * @deprecated
	 * @param url
	 */
	public Crawler(String url){
		this.url = url;
	}
	/**
	 * @deprecated
	 * @param url
	 * @param proxy
	 */
	public Crawler(String url, String proxy){
		this.url = url;
		this.proxy = proxy;
	}
	
	/**
	 * 
	 * @return
	 * @throws MalformedURLException
	 * @throws IOException
	 */
	public Document work(String url) throws MalformedURLException, IOException{
		return work(url, ProxyMgr.getProxy(), null);
	}
	
	public Document work(String url, HttpProxy proxy, String userAgent){
		Connection con = Jsoup.connect(url);
		if(userAgent == null){
			userAgent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.2.16) Gecko/20110319 Firefox/3.6.16";
		}
		con.userAgent(userAgent)
			.timeout(Conf.CRAWLER_HTTP_TIMEOUT)
			.proxy(proxy);
		Log.info("CRAWLING "+ url +"\n with PROXY: " + proxy);
		Document ret = null;
		try {
			ret = con.get();
			if(proxy != null){
				ProxyMgr.onProxyResult(proxy.getHostName(), true);
			}
		} catch (Exception e) {
			//e.printStackTrace();
			try {
				ret = con.proxy(null).get();
				if(proxy != null){
					ProxyMgr.onProxyResult(proxy.getHostName(), false);
					Log.info("PROXY Fail: " + proxy.toString() + " on " + url + "\n with error:" + e.getMessage());
				}
			} catch (Exception e1) {
				//e1.printStackTrace();
				Log.info("ERROR: fail to get " + url + "\n with error:" + e1.getMessage());
				return null;
			}
		}
		return ret;
	}
	
	
	public static void main(String[] args) throws Exception {
		//String u = "http://yijuchung.blogspot.com/2010/10/http-header-about-proxy.html";
		//String p = "172.19.1.2:8217";
		//String p = "127.0.0.1:1080";
		//String u = "http://dsc.taobaocdn.com/i6/910/671/9146792157/T10tiXXmVeXXXXXXXX.desc|var%5Edesc%3Bsign%5E0841ff0f333841fd4f1e05f91d3d9fbe%3Blang%5Egbk%3Bt%5E1303954731";
		String u = "http://www.baidu.com/";
		String p = "125.77.194.103:80";
		HttpProxy proxy = ProxyMgr.genProxy(p);
		Crawler c = new Crawler(u,p);
		System.out.println(c.work(u, proxy, null).html());
		//System.out.println(c.work(u).select("body"). html());
	}
}
